#######################################
# Budget data et al. analysis:
# Descriptive plots and tables
#
# Replication Material for:
# Continuity or Change? (In)direct Rule in British and French Colonial Africa
# 
# Carl Mueller-Crepon, 2020
# International Organization
#
# File Description:
# Descriptive plots and figures, budget analysis
#
# Called from scripts/governance/analysis_budgetetal.R
#
################################


#######################################
# Correlation Matrix of Outcomes
# Appendix Figure A8
#######################################

# ... data
corr.data <- data.cross

# ... add Nigeria Admin
corr.data$staff.pc <- corr.data$staff/corr.data$pop
for(p in corr.data$province[corr.data$colony == "Nigeria"]){
  corr.data$staff.pc[corr.data$province == p] <- (data.admin$staff/data.admin$pop)[data.admin$province == p]
}

# ... make variables
var <- c("log(area)", "I(staff/pop)", "log(total.rev +1)","max.chief.class")
corr.data$area.log <- log(corr.data$area)
corr.data$total.rev.log <- log(corr.data$total.rev + 1)

# ... demean by colony
plot.data <- demeanlist(corr.data[,c("area.log","staff.pc","total.rev.log", "max.chief.class")],
                        list(as.factor(corr.data$colony)))
plot.data$colony <- corr.data$colony

# ... scatter plot matrix

png(paste0(fig.path, "outcome_scattermatrix.png"), 
    height = 7, width = 7, res = 300, unit = "in")
g <- ggpairs(data = corr.data[,c("area.log","staff.pc","total.rev.log", "max.chief.class")], 
             columnLabels = c("Area (log)", "Eur. Admin. p.c.", "Total revenue (log)", "Max. chief class"))
print(g)
dev.off()


#######################################
# Summary table of the data
# Table 3, main Paper
#######################################

# Header
head <- paste0("\\begin{table}[!htbp] \\centering ",
               "  \\caption{Data on the indirectness of colonial rule: Overview} ",
               "  \\label{tab.data.overview} ",
               "\\begin{tabular}{r",paste(rep("c",9),collapse = ""),"}")

# Columns: Colonies
colonies <- c("Nigeria", "Uganda", "Gold Coast", "Nyasaland",
              "Sierra Leone", "Tanzania", "Kenya", "Northern Rhodesia")
colonies <- c(colonies[order(colonies)], "French West Africa") 
col.header <- paste("& \\rot{ ", 
                    paste(colonies,collapse = " } & \\rot{ "),
                    " } \\\\ \\hline")

# Content
row.1 <- paste0("District size & ", paste(rep("yes", 9), collapse = " & "), "\\\\")
row.2 <- paste0("European administrators & ", paste(c("--","yes")[as.numeric(colonies %in% c("Nigeria", "Uganda"))+1], collapse = " & "), "\\\\")
row.3 <- paste0("Local budgets & ", paste(c("--","yes")[as.numeric(colonies %in% c("Nigeria", "Uganda", "Gold Coast", "Nyasaland","French West Africa"))+1], collapse = " & "), "\\\\")
row.4 <- paste0("Class of chiefs & ", paste(c("--","yes")[as.numeric(colonies %in% c("Nigeria"))+1], collapse = " & "), "\\\\")

# Footer
footer <- paste("\\hline", "\\end{tabular} \\end{table}  ")

# Combine everything
stub <- "data.overview"
fileConn<-file(paste0(tab.path, stub, ".tex"))
writeLines(c(head, col.header, row.1,row.2, row.3, row.4, footer),
           fileConn)
close(fileConn)


#############################
# Revenues over time
# Appendix Figure A6
#############################

png(paste0(fig.path, "revenues_ts.png"), 
    height = 5, width = 7, res = 300, unit = "in")
g <- ggplot(data.brit[data.brit$colony != "Kenya",], aes(x = year, y = log(1+total.rev/pop), group = geo.match)) + 
  geom_point(alpha = 0.5) + geom_line(alpha = 0.5) +
  ylab("Native Administration Revenues/capita (logged)") +
  xlab("Year") + facet_wrap(~ colony, nrow = 2) + 
  theme( legend.position="none")
print(g)
dev.off()

################################
# Revenues in space
# Figure 7 (main paper) and Appendix Figure A7
################################

# ... prepare and merge data
comb.dist <- as.character(dist.spdf$geo.match[grepl("|",dist.spdf$geo.match, fixed = T)])
drop.dist <- dist.spdf$geo.match[unlist(lapply(dist.spdf$geo.match, function(d){d %in% unlist(strsplit(comb.dist, split = "|", fixed = T))})) & !dist.spdf$geo.match %in% comb.dist]
these.dists <- dist.spdf[!dist.spdf$geo.match %in% drop.dist,]
these.dists@data <- join(these.dists@data, data.cross[,c("geo.match", "colony", "total.rev", "v33.num", "pop")])
these.dists <- these.dists[these.dists$colony %in% unique(data.cross$colony) & !is.na(these.dists$dist_id),]
# Plot: revenues

these.dists$plot.var <- log(1+these.dists@data$total.rev / these.dists@data$pop)
# ... colour palette
colpal <- viridis(256, option = "inferno")#colorRampPalette(c("blue","green"),space = "Lab")(256)
these.dists@data$colkey <- colpal[as.numeric(cut(these.dists$plot.var ,breaks = 255))]
these.dists@data$colkey[is.na(these.dists@data$colkey)] <- "darkgrey"

# ... Loop over colonies
legend.colony <- "Uganda"
for(colony in unique(these.dists@data$colony)){
  # ... Aspect Ratio
  ext.vec <- as.vector(extent(these.dists[these.dists$colony == colony,]))
  asp.ratio <- (ext.vec[2] - ext.vec[1])/(ext.vec[4] - ext.vec[3])
  # ... plot
  legend.map <- colony == legend.colony
  png(paste0(fig.path, "revenuemap.",colony, ".png"), 
      height = 4, width = ifelse(legend.map, 4*asp.ratio + 1, 4*asp.ratio), res = 300, unit = "in")
  
  
  # ... main
  par(mar = c(0,0,0,0))
  if(legend.map){layout(matrix(1:2,ncol=2), width = c(4,1),height = c(1,1))}
  
  plot(these.dists[these.dists$colony == colony,], 
       col = these.dists$colkey[these.dists$colony == colony], 
       border = "white", lwd = 0.3)
  plot(dist.spdf[dist.spdf$geo.match %in% drop.dist,], 
       border = "white", lwd = 0.3, lty = 3, add = T)
  
  # ... legend
  if(legend.map){
    legend_image <- as.raster(matrix(rev(colpal), ncol=1))
    plot(c(0,2),c(min(these.dists$plot.var, na.rm = T), 
                  max(these.dists$plot.var, na.rm = T)),type = 'n', axes = F,xlab = '', ylab = '', main = '')
    axis(side = 4,pos = 0.5)
    rasterImage(legend_image, 0,min(these.dists$plot.var, na.rm = T), 
                0.5, max(these.dists$plot.var, na.rm = T))
  }
  dev.off()
}

#######################################
# Summary statistics
# Table 4, main paper
#######################################

# ... subset data
data.temp <- data.brit[(!is.na(data.brit$total.rev) | !is.na(data.brit$total.exp)) & !is.na(data.brit$year),]

# ... number of districts
data.sum <- aggregate.data.frame(list(districts = data.temp$geo.match),
                                 list(colony = data.temp[,c("colony")]), FUN = function(x){length(unique(x))})
# ... start year
data.sum <- join(data.sum,
                 aggregate.data.frame(list(start.year = data.temp$year),
                                      list(colony = data.temp[,c("colony")]), FUN = min, na.rm = T))
# ... end year
data.sum <- join(data.sum,
                 aggregate.data.frame(list(end.year = data.temp$year),
                                      list(colony = data.temp[,c("colony")]), FUN = max, na.rm = T))

# ... number of years
data.sum <- join(data.sum,
                 aggregate.data.frame(list(year.num = data.temp$year),
                                      list(colony = data.temp[,c("colony")]), FUN = function(x){length(unique(x))}))

# ... mean revenue
data.sum <- join(data.sum,
                 aggregate.data.frame(list(revenue = data.temp$total.rev/data.temp$pop),
                                      list(colony = data.temp[,c("colony")]), FUN = mean, na.rm = T))

# ... mean expenditure
data.sum <- join(data.sum,
                 aggregate.data.frame(list(expenditure = data.temp$total.exp/data.temp$pop),
                                      list(colony = data.temp[,c("colony")]), FUN = mean, na.rm = T))
colnames(data.sum) <- c("Colony", "Districts", "Start", "End", 
                        "No. of years", "Avg. revenue", "Avg. expenditure")

# Make table
fileConn<-file(paste0(tab.path, "data.summary.tex"))
writeLines(
  stargazer(data.sum,
            title="Summary of native treasury data",
            notes.align = "l",label=paste0("tab.data.summary"),align =T,
            digits = 2, summary = F, rownames = F,digit.separate = 0,
            type  = output.type, column.sep.width = "0pt",
            notes = paste0("\\parbox[t]{",
.95,"\\textwidth}{\\textit{Notes:} Note that the number of observations in the data might 
be smaller than the number of existing districts, because some budget reports report numbers 
above the district level (e.g. Buganda, Uganda).}")), 
  fileConn)
close(fileConn)

#########################################
# Sources Table
# Appendix Table A8
#########################################

# Select vars
source.vars <- c("colony","Title","pages.broken", "id")
var.names <- c("Colony","Title", "Pages", "Microform ID")

# Make Line breaks
report.id$pages.broken <- paste0("\\specialcell{",unlist(lapply(report.id$pages, function(p){
  breaks <- length(seq(0, sapply(regmatches(p, gregexpr(";", p)), length), by = 5)) - 1
  if(breaks > 0){
    for(b in c(1:breaks)){p <- paste0(substr(p,1, gregexpr(";", p)[[1]][b*5]),
                                      "\\\\",
                                      substr(p,gregexpr(";", p)[[1]][b*5]+2, nchar(p)))}
    
  }
  p
})),"}")

# Make table

# ... Sort and name columns
report.tab <- report.id[,source.vars]
colnames(report.tab) <- var.names
report.tab$Colony[report.tab$Colony == "Ghana"] = "Gold Coast"

# ... header
head <- c("\\begin{table}[!htbp] \\centering ",
          "  \\caption{Sources of native treasury data} ",
          "  \\label{tab.data.source} ",
          "\\begin{tabular}{@{\\extracolsep{0pt}} D{.}{.}{-2} D{.}{.}{-2} D{.}{.}{-2} D{.}{.}{-2} } ",
          "\\\\[-1.8ex]\\hline ",
          "\\hline \\\\[-1.8ex] ")

# ... footer
foot <- c("\\hline \\\\[-1.8ex] "    ,                                                                                                                                                                                                                                                                                                                                                                                                               
          "\\multicolumn{4}{l}{\\begin{minipage}[t]{0.8\\columnwidth} Microform ID denotes the `Reference ID' used on \\url{www.britishonlinearchives.co.uk}. \\end{minipage}} \\\\ ",                                                                                                                                                                                                                                                               
          "\\end{tabular} " ,                                                                                                                                                                                                                                                                                                                                                                                                                        
          "\\end{table} " )

# ... column titles
col.tit <- paste0("\\multicolumn{1}{c}{",paste(colnames(report.tab), collapse = "} & \\multicolumn{1}{c}{"),"} \\\\" ,
                  "\\hline \\\\[-1.8ex] ")

# ... content
content <- paste(paste(paste0("\\multicolumn{1}{l}{",apply(report.tab, 1, paste, collapse = "} & \\multicolumn{1}{l}{"),"} " ),
                 collapse = " \\\\ \\\\[-1.8ex] "),
                 " \\\\ \\\\[-1.8ex] ")


# Save table
fileConn<-file(paste0(tab.path, "data.source.tex"))
writeLines(
  c(head, col.tit, content, foot), fileConn)
close(fileConn)


################################################
# Summary statistics
# Appendix Table A9
################################################

# Init
sum.vars <- c(main.expl, "pop","area",base.controls.abs,nature.controls,ethnic.controls)
sum.vars <- sum.vars[!grepl("_colfrnc",sum.vars)]
sum.vars <- c("total.rev",rev.types, "total.exp", exp.types,sum.vars)
sum.stats.df <- data.cross[,sum.vars]

# Log vars (as in analysis)

# ... + 1
log.vars <- c("total.rev",rev.types, "total.exp", exp.types,"pop","area")
sum.stats.df[, log.vars] <- log(1+sum.stats.df[, log.vars])

# ... simple
log.vars <- c("area")
sum.stats.df[, log.vars] <- log(sum.stats.df[, log.vars])

# Labels
sum.var.labs <- c("Total revenue (log)", "Revenue from: Taxes (log)", "... Fees \\& fines (log)", "... Transfers (log)","... Other (log)",
                  "Total expenitures (log)", "Expenditures on: Administration (log)", "... Order (log)", "... Education \\& health (log)",
                  "... Agriculture (log)", "... Public works (log)", "... Other (log)",
                  "Precolonial centralization", 
                  "Population (log)", "Area (log)", "Population density (log, 1880)", "Ethnic groups' pop. density (log, 1880)", 
                  "Distance to coast (log)", "Distance to nav. river (log)","Median altitude",
                  "Median slope","Mean temperature","Evapotranspiration","Precipitation","Evapotranspiration/Precipitation",
                  "Agricultural suitability", "Cash crop suitability","Reliance on agriculture",
                  "Reliance on pastoralism", "Intensity of agriculture")

# Print to table
fileConn<-file(paste0(tab.path, "budget.sumtab.tex"))
writeLines(
  stargazer(sum.stats.df,
            title="Summary of British budget data",
            covariate.labels = sum.var.labs,
            notes.align = "l",label=paste0("tab.budget.summary"),align =F,
            digits = 2,  rownames = F,digit.separate = 0,
            type  = output.type), 
  fileConn)
close(fileConn)
